In [ ]:
In [2]:
import scipy
import scanpy as sc
import pandas as pd
import numpy as np
import stlearn as st
import matplotlib.pyplot as plt
import os
In [22]:
os.chdir("/QRISdata/Q4386/curio/breast/Human_breast_cancer_3by3_v1pt1/")
In [2]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1")
In [2]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1")
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[2], line 1 ----> 1 os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1") FileNotFoundError: [Errno 2] No such file or directory: '/scratch/project/stseq/Prakrithi/curio/Human_breast_cancer_3by3_v1pt1_input/Human_breast_cancer_3by3_v1pt1'
In [23]:
uA=np.transpose(pd.read_csv('uTARs_mincell_filtered_mat_final.txt', sep="\t", header=0, index_col=0))
spatialuA=pd.read_csv('spatial_row_col', sep="\t", header=0)
breast_uTARs = st.create_stlearn(count=uA,spatial=spatialuA,library_id="A",scale=0.248)
In [40]:
st.pl.QC_plot(breast_uTARs, spot_size=(1,2), tissue_alpha=0.4, dpi=900,cmap="Reds")#,name="MBA_uTARQC.pdf", output="/QRISdata/Q4386/polyA_FreshFrozen/QCplots/")
In [57]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr5_11931649_11933299_+_783_0','chr18_63063649_63064549_-_5375_0','chr17_49855249_49858099_+_262_0','chr6_6676899_6678299_-_97_0',
'chr1_91049599_91049999_+_7710_0','chr10_37391399_37422749_+_39822_0','chr1_31439949_31455549_-_5769_0'],cmap='Reds',spot_size=50, vmax=0.1)
#uncharacterized LOC107984223 chr10_37391399_37422749_+_39822_0
In [ ]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr21_43784699_43785949_+_29_0','chr6_6676899_6678299_-_97_0','chr1_91050599_91051949_+_62_0'],cmap='Reds',spot_size=50, vmax=0.1)
#uncharacterized LOC107984223 chr10_37391399_37422749_+_39822_0
In [21]:
row_idx = uAs.var_names.get_loc('chr5_11931649_11933299_+_783_0')
Spatially variable uTARs¶
In [24]:
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr13_22998649_23006399_-_10358_0','chr4_124305749_124320749_+_8075_0',
'chr2_105223299_105224399_+_2070_0',
'chr17_21966899_21984599_+_24995_0',
'chr20_31161799_31191899_+_9716_0'],cmap='Reds',spot_size=50, vmax=0.1)
In [27]:
import matplotlib.pyplot as plt
with plt.rc_context({"figure.figsize": (8, 8), "figure.dpi": (300)}):
sc.pl.spatial(breast_uTARs,alpha_img=0.6, color =['chr2_105223299_105224399_+_2070_0'],cmap='inferno',spot_size=50, vmax=0.1, show=False)
plt.savefig("/scratch/project/stseq/Prakrithi/STOMICS/cuTAR144737_curio_breast.pdf", bbox_inches="tight")
Genes¶
In [21]:
uAg=np.transpose(pd.read_csv('genes_mincell_filtered_matrix.txt', sep="\t", header=0, index_col=0))
spatialuAg=pd.read_csv('spatial_row_col_genes', sep="\t", header=0)
breast_genes = st.create_stlearn(count=uAg,spatial=spatialuAg,library_id="A",scale=0.248)
In [4]:
st.pl.QC_plot(breast_genes, spot_size=(1,2), tissue_alpha=0.4, dpi=900,cmap="Reds")
In [10]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['GAPDH','MALAT1','EPCAM','IGKC'],cmap='Spectral_r',spot_size=30,vmax=10)
In [11]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561','NME1','TOB1','CLU','CR2'],cmap='Spectral_r',spot_size=30,vmax=2)
In [26]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561'],cmap='Spectral_r', spot_size=30)
In [25]:
sc.pl.spatial(breast_genes,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=2, spot_size=30)
Curio pipeline genes¶
In [6]:
curio_genes = sc.read_h5ad('../../Human_breast_cancer_3by3_v1pt1/Human_breast_cancer_v1pt1_anndata.h5ad')
sc.pl.spatial(curio_genes,alpha_img=0.6, color =['TINCR', 'IRAG2','MGP','CYB561'],cmap='Spectral_r',vmax=2, spot_size=30)
In [5]:
sc.pl.spatial(curio_genes,alpha_img=0.6, color =['GAPDH','MALAT1','EPCAM','IGKC'],cmap='Spectral_r',vmax=10, spot_size=30)
SpatDE¶
In [1]:
import NaiveDE
import SpatialDE
import anndata as ad
import numpy as np
In [ ]:
# Assuming `adata` is your AnnData object containing Visium data
# Calculate total counts for each barcode (spot)
breast_uTARs.obs['total_counts'] = breast_uTARs.X.sum(axis=1)
# Display the first few total counts
print(breast_uTARs.obs['total_counts'].head())
TTGGTCAAACGGGG 100 CGTCTGCAGGCCAA 76 AATTCACGCCTACC 95 GCTAGAGCAGATCG 46 CAGCAAGAGCCTCC 80 Name: total_counts, dtype: int64
In [41]:
counts = uA.T[uA.sum(0) >= 3].T
counts
Out[41]:
| GENE | chr10_100022799_100025349_+_62_0 | chr10_100029249_100032199_+_103_0 | chr10_100096949_100098099_+_40_0 | chr10_100145399_100146199_-_63_0 | chr10_100186099_100187449_+_102_0 | chr10_100484449_100486349_+_39_0 | chr10_100563349_100565049_+_136_0 | chr10_100568299_100569549_+_31_0 | chr10_100570899_100572699_+_162_0 | chr10_100573249_100575549_-_31_0 | ... | chrY_56825699_56837199_-_1069_0 | chrY_56867549_56869199_-_126_0 | chrY_56875449_56876599_+_59_0 | chrY_56880799_56881949_+_52_0 | chrY_56883199_56886099_+_135_0 | chrY_6749099_6749849_+_51_0 | chrY_7136299_7137599_+_74_0 | chrY_7443299_7443549_-_45_0 | chrY_8232549_8232699_-_181_0 | chrY_9629449_9631199_+_33_0 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TTGGTCAAACGGGG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| CGTCTGCAGGCCAA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| AATTCACGCCTACC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| GCTAGAGCAGATCG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| CAGCAAGAGCCTCC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTGCGCCGCAGTCG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| GTGTTGAGCTCCGA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| TTTTTTCCTCGCAG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| TTTAGGTATCTCAA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| TCCCGCTAGCAAAG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
51174 rows × 27401 columns
In [8]:
counts=np.transpose(pd.read_csv('uTARs_in_Alex_samples_mat_mincell.txt', sep="\t", header=0, index_col=0))
counts = counts.T[counts.sum(0) >= 3].T
counts.head()
Out[8]:
| GENE | chr10_100022799_100025349_+_62_0 | chr10_100096949_100098099_+_40_0 | chr10_100145399_100146199_-_63_0 | chr10_100186099_100187449_+_102_0 | chr10_100484449_100486349_+_39_0 | chr10_100568299_100569549_+_31_0 | chr10_100570899_100572699_+_162_0 | chr10_100896449_100900849_-_73_0 | chr10_101043349_101043799_+_29_0 | chr10_101053899_101057549_+_287_0 | ... | chrY_14180649_14180799_-_83_0 | chrY_15030199_15032799_-_28_0 | chrY_15882949_15883099_-_5492_0 | chrY_15896049_15896899_+_3426_0 | chrY_16912899_16915549_+_83_0 | chrY_22274399_22277699_+_108_0 | chrY_56875449_56876599_+_59_0 | chrY_56883199_56886099_+_135_0 | chrY_6749099_6749849_+_51_0 | chrY_8232549_8232699_-_181_0 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TTGGTCAAACGGGG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| CGTCTGCAGGCCAA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| AATTCACGCCTACC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| GCTAGAGCAGATCG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| CAGCAAGAGCCTCC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 12324 columns
In [9]:
counts=counts+1
In [9]:
print(counts.shape)
print(sample_info.shape)
print(norm_expr.shape)
(51174, 12324) (51174, 3) (51174, 12324)
In [20]:
pip show numpy
WARNING: Ignoring invalid distribution -cikit-learn (/home/s4716765/.local/lib/python3.8/site-packages) Name: numpy Version: 1.21.6 Summary: NumPy is the fundamental package for array computing with Python. Home-page: https://www.numpy.org Author: Travis E. Oliphant et al. Author-email: License: BSD Location: /home/s4716765/.local/lib/python3.8/site-packages Requires: Required-by: access, anndata, biopython, bokeh, contourpy, gensim, geosketch, h5py, harmonypy, imageio, inequality, Keras-Applications, Keras-Preprocessing, libpysal, lightning, mapclassify, matplotlib, mgwr, mudata, muon, NaiveDE, numba, opt-einsum, pandas, patsy, pointpats, pygeos, PyWavelets, quantecon, rasterio, rasterstats, scanorama, scanpy, scikit-image, scikit-learn, scipy, seaborn, segregation, shapely, snuggs, spaghetti, SpatialDE, spglm, spint, splot, spopt, spreg, spvcm, statsmodels, stlearn, tensorboard, tensorflow, tifffile, tobler, torchmetrics, umap-learn, xgboost, yoctol-keras-layer-zoo Note: you may need to restart the kernel to use updated packages.
In [ ]:
In [2]:
import sys
print(sys.executable)
print("Numpy location:", np.__file__)
/home/s4716765/.conda/envs/stlearn_heart/bin/python Numpy location: /home/s4716765/.local/lib/python3.8/site-packages/numpy/__init__.py
In [14]:
sample_info = pd.read_csv('spatDE_metadata_mingene3.txt',sep="\t", header=0, index_col=0) # ERROR CAZ SOME CELLS HAD 0 COUNTS. REMOVE THOSE CELLS IN THAT CASE
counts=counts.loc[counts.index.intersection(sample_info.index)]
counts
Out[14]:
| GENE | chr10_100022799_100025349_+_62_0 | chr10_100096949_100098099_+_40_0 | chr10_100145399_100146199_-_63_0 | chr10_100186099_100187449_+_102_0 | chr10_100484449_100486349_+_39_0 | chr10_100568299_100569549_+_31_0 | chr10_100570899_100572699_+_162_0 | chr10_100896449_100900849_-_73_0 | chr10_101043349_101043799_+_29_0 | chr10_101053899_101057549_+_287_0 | ... | chrY_14180649_14180799_-_83_0 | chrY_15030199_15032799_-_28_0 | chrY_15882949_15883099_-_5492_0 | chrY_15896049_15896899_+_3426_0 | chrY_16912899_16915549_+_83_0 | chrY_22274399_22277699_+_108_0 | chrY_56875449_56876599_+_59_0 | chrY_56883199_56886099_+_135_0 | chrY_6749099_6749849_+_51_0 | chrY_8232549_8232699_-_181_0 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TTGGTCAAACGGGG | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| CGTCTGCAGGCCAA | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| AATTCACGCCTACC | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 1 |
| GCTAGAGCAGATCG | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| CAGCAAGAGCCTCC | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTCGGCGAGCGGTC | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| AACCGGTGTCCACA | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| CGCCAGAGTCCTAA | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| TGAGACTCTATTTA | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| ATGCAACGCGCGTT | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
23910 rows × 12324 columns
In [15]:
norm_expr = NaiveDE.stabilize(counts.T).T
resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(total_counts)').T
In [20]:
sample_resid_expr = resid_expr.sample(n=1000, axis=1, random_state=1)
X = sample_info[['x', 'y']]
#results = SpatialDE.run(X, sample_resid_expr)
results = SpatialDE.run(np.array(X), sample_resid_expr)
Models: 0%| | 0/10 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
0%| | 0/1000 [00:00<?, ?it/s]
In [21]:
results.head()
Out[21]:
| FSV | M | g | l | max_delta | max_ll | max_mu_hat | max_s2_t_hat | model | n | s2_FSV | s2_logdelta | time | BIC | max_ll_null | LLR | pval | qval | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.999955 | 4 | chr16_48493499_48494949_+_72_0 | 0.70767 | 0.000045 | 55677.911720 | -0.694482 | 0.482812 | SE | 23910 | 1492.340209 | 5.243904e+11 | 0.004383 | -111315.495231 | 55677.897630 | 0.014089 | 0.905515 | 0.949501 |
| 1 | 0.999955 | 4 | chr8_56296449_56297249_-_33_0 | 0.70767 | 0.000045 | 64756.456167 | -0.694383 | 0.482379 | SE | 23910 | 1515.696452 | 5.325975e+11 | 0.004405 | -129472.584126 | 64756.442302 | 0.013865 | 0.906266 | 0.949501 |
| 2 | 0.999955 | 4 | chr8_89367849_89369949_-_161_0 | 0.70767 | 0.000045 | 52682.399520 | -0.696059 | 0.485163 | SE | 23910 | 1526.864389 | 5.365218e+11 | 0.004248 | -105324.470832 | 52682.385705 | 0.013815 | 0.906434 | 0.949501 |
| 3 | 0.999955 | 4 | chr2_42764899_42766849_+_99_0 | 0.70767 | 0.000045 | 64759.075312 | -0.694854 | 0.483032 | SE | 23910 | 1512.995851 | 5.316485e+11 | 0.004197 | -129477.822416 | 64759.061420 | 0.013892 | 0.906176 | 0.949501 |
| 4 | 0.999955 | 4 | chr8_33307899_33308549_-_93_0 | 0.70767 | 0.000045 | 67773.092187 | -0.695065 | 0.483269 | SE | 23910 | 1580.821025 | 5.554815e+11 | 0.004241 | -135505.856166 | 67773.078814 | 0.013373 | 0.907937 | 0.949501 |
In [22]:
results.to_csv('Breast_SpatDE_results.txt', sep='\t', index=False)
In [ ]:
In [ ]:
In [ ]:
In [12]:
design_matrix = np.vstack([np.ones(sample_info.shape[0]), np.log(sample_info['total_counts'])]).T
In [13]:
design_matrix
Out[13]:
array([[1. , 4.60517019],
[1. , 4.33073334],
[1. , 4.55387689],
...,
[1. , -inf],
[1. , -inf],
[1. , -inf]])
In [ ]:
sample_resid_expr = resid_expr.sample(n=10000, axis=1, random_state=1)
X = sample_info[['x', 'y']]
results = SpatialDE.run(X, sample_resid_expr)
In [39]:
sample_info
Out[39]:
| total_counts | x | y | |
|---|---|---|---|
| GENE | |||
| TTGGTCAAACGGGG | 100 | 711.0160 | 727.1112 |
| CGTCTGCAGGCCAA | 76 | 924.4944 | 425.7168 |
| AATTCACGCCTACC | 95 | 781.6960 | 1334.8104 |
| GCTAGAGCAGATCG | 46 | 373.5128 | 401.0656 |
| CAGCAAGAGCCTCC | 80 | 1119.9680 | 612.0392 |
| ... | ... | ... | ... |
| TTGCGCCGCAGTCG | 0 | 1141.9656 | 1294.4112 |
| GTGTTGAGCTCCGA | 0 | 657.1008 | 452.1040 |
| TTTTTTCCTCGCAG | 0 | 1005.8632 | 533.3488 |
| TTTAGGTATCTCAA | 0 | 619.0328 | 522.8832 |
| TCCCGCTAGCAAAG | 0 | 1130.3840 | 1240.7440 |
51174 rows × 3 columns
Moran's¶
In [3]:
m=np.transpose(pd.read_csv('gene_uTAR_merged.txt', sep="\t", header=0, index_col=0))
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
gene_uTAR_merged = st.create_stlearn(count=m,spatial=spatialm,library_id="A",scale=0.248)
In [4]:
import joblib
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
#!pip install pysal
import pysal
from pysal.explore import esda
import pysal.lib as lps
from esda.moran import Moran, Moran_Local, Moran_BV, Moran_Local_BV
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_moran_bv_simulation, plot_moran_bv, plot_local_autocorrelation
from libpysal.weights.contiguity import Queen
from libpysal import examples
import numpy as np
import os
import scanpy as sc
import stlearn as st
/home/s4716765/.local/lib/python3.8/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.2-CAPI-1.17.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow. warnings.warn( /scratch/temp/10916475/ipykernel_1753339/3900667329.py:3: DeprecationWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas still uses PyGEOS by default. However, starting with version 0.14, the default will switch to Shapely. To force to use Shapely 2.0 now, you can either uninstall PyGEOS or set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas: import os os.environ['USE_PYGEOS'] = '0' import geopandas In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html). import geopandas as gpd /home/s4716765/.local/lib/python3.8/site-packages/spaghetti/network.py:40: FutureWarning: The next major release of pysal/spaghetti (2.0.0) will drop support for all ``libpysal.cg`` geometries. This change is a first step in refactoring ``spaghetti`` that is expected to result in dramatically reduced runtimes for network instantiation and operations. Users currently requiring network and point pattern input as ``libpysal.cg`` geometries should prepare for this simply by converting to ``shapely`` geometries. warnings.warn(dep_msg, FutureWarning, stacklevel=1)
In [5]:
#spatial.iloc[1:5,:]
#spatial=spatial.set_index('BC')
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
spatialm.index =gene_uTAR_merged.obs.index
#data.obs=spatial.iloc[:,[0,1,2,3,4]]
from typing import Literal
library_id='A'
#data.obs.iloc[1:5,:]
_QUALITY = Literal["fulres", "hires", "lowres"]
quality: _QUALITY = "hires"
scale = gene_uTAR_merged.uns["spatial"][library_id]["scalefactors"][
"tissue_" + quality + "_scalef"]
image_coor = gene_uTAR_merged.obsm["spatial"] * scale
spatialm["imagecol"] = image_coor[:, 1]*0.208
spatialm["imagerow"] = image_coor[:, 0]*0.208
#s4 0.208 s6 0.2025
gene_uTAR_merged.obsm["gpd"] = gpd.GeoDataFrame(spatialm.iloc[:,:],geometry=gpd.points_from_xy(y=spatialm.imagerow,x=spatialm.imagecol))
In [31]:
import anndata
# Assuming spatial_df is your DataFrame and adata is your AnnData object
# 1. Get the row names of spatial_df
matching_cells = sample_info.index
# 2. Subset the AnnData object
gene_uTAR_mergedn = gene_uTAR_merged[gene_uTAR_merged.obs_names.isin(matching_cells)].copy()
# Now adata_subset contains only the observations that match the row names of spatial_df
In [64]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()
#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2" #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="FUS"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr10_37391399_37422749_+_39822_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"
# AKA cuTAR33004 cuTAR32998
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
/scratch/temp/10368256/ipykernel_1411758/3573628254.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
In [58]:
def plot_choropleth(gdf,
attribute_1,
attribute_2,
alpha=0.5,
scheme='Quantiles',
cmap='YlGnBu',
legend=True):
fig, axs = plt.subplots(2,1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Choropleth for attribute_1
gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[0], alpha=alpha, markersize=2)
axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
axs[0].set_axis_off()
# Choropleth for attribute_2
gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[1], alpha=alpha, markersize=2)
axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
axs[1].set_axis_off()
plt.tight_layout()
return fig, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"],
gene1_name,
gene2_name)
plt.show()
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn( /home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn(
In [63]:
fig, axs = plt.subplots(1, 1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Main spatial correlation plot
lisa_cluster(moran_loc_bv,
gene_uTAR_merged.obsm["gpd"],
p=0.05,
figsize=(9, 9),
markersize=0.3,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
In [3]:
In [64]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()
#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2" #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="BIRC5"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr10_37391399_37422749_+_39822_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
def plot_choropleth(gdf,
attribute_1,
attribute_2,
alpha=0.5,
scheme='Quantiles',
cmap='YlGnBu',
legend=True):
fig, axs = plt.subplots(2,1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Choropleth for attribute_1
gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[0], alpha=alpha, markersize=2)
axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
axs[0].set_axis_off()
# Choropleth for attribute_2
gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[1], alpha=alpha, markersize=2)
axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
axs[1].set_axis_off()
plt.tight_layout()
return fig, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"],
gene1_name,
gene2_name)
plt.show()
# Main spatial correlation plot
lisa_cluster(moran_loc_bv,
gene_uTAR_merged.obsm["gpd"],
p=0.05,
figsize=(9, 9),
markersize=0.15,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/3755851813.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn( /home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn(
<Figure size 640x480 with 0 Axes>
In [12]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()
#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2" #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="BIRC5"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr5_11931649_11933299_+_783_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
fig, axs = plt.subplots(1, 1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
###
def plot_choropleth(gdf,
attribute_1,
attribute_2,
alpha=0.5,
scheme='Quantiles',
cmap='YlGnBu',
legend=True):
fig, axs = plt.subplots(2,1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Choropleth for attribute_1
gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[0], alpha=alpha, markersize=2)
axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
axs[0].set_axis_off()
# Choropleth for attribute_2
gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[1], alpha=alpha, markersize=2)
axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
axs[1].set_axis_off()
plt.tight_layout()
return fig#, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"],
gene1_name,
gene2_name)
plt.show()
###
# Main spatial correlation plot
lisa_cluster(moran_loc_bv,
gene_uTAR_merged.obsm["gpd"],
p=0.05,
figsize=(9, 9),
markersize=0.3,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/16448487.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn( /home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn(
<Figure size 640x480 with 0 Axes>
In [35]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()
#gene1_name="IRAK3"#"MSRB3"#"PFN1"#"ATOX1"
#"VHL"#"PTEN"#"TMSB10"#"ITM2B" #"PKM" #"ANXA2" #CCND1:chr19_9687799_9691349_-_3392_0
#gene2_name="12_66056899_66058349_+_736630_0"#"X_44794599_44795299_+_146087_0"#"18_63063049_63063849_-_11504_0"#"5_18162949_18163149_+_4437_0"
gene1_name="TARDBP"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr13_22998649_23006399_-_10358_0"#"12_66057549_66058349_+_212175_0"#"1_91048949_91049999_+_26510_0"#""#"12_66056899_66058349_+_736630_0"#"5_18162949_18163149_+_4437_0"
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
fig, axs = plt.subplots(1, 1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Main spatial correlation plot
lisa_cluster(moran_loc_bv,
gene_uTAR_merged.obsm["gpd"],
p=0.05,
figsize=(9, 9),
markersize=0.3,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10490146/ipykernel_1489797/524671805.py:14: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
In [34]:
gene_uTAR_mergedn = gene_uTAR_merged
In [7]:
## CALC MORAN'S INDEX
epr_df = gene_uTAR_merged.to_df()
gene1_name="ID3"#""##"HNRNPD"#"FUS"#"ELAVL1"#"TIA1"#"EWSR1"#"TNRC6A"#"IGF2BP2"#"AGO4"#"AGO3"#"PTBP1"#"TAF15"#"IGF2BP1"#"SRSF1"#"IGF2BP3"#"FUS"#"TARDBP"#"AGO2" "AGO1"#"TAF15"#"MOV10"
gene2_name="chr17_49855249_49858099_+_262_0"
gene_uTAR_merged.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
gene_uTAR_merged.obsm["gpd"][gene2_name] = epr_df[gene2_name]
w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
#
def plot_choropleth(gdf,
attribute_1,
attribute_2,
alpha=0.5,
scheme='Quantiles',
cmap='YlGnBu',
legend=True):
fig, axs = plt.subplots(2,1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
# Choropleth for attribute_1
gdf.plot(column=attribute_1, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[0], alpha=alpha, markersize=2)
axs[0].set_title('Choropleth plot for {}'.format(attribute_1), y=0.8)
axs[0].set_axis_off()
# Choropleth for attribute_2
gdf.plot(column=attribute_2, scheme=scheme, cmap=cmap,
legend=legend, legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)},
ax=axs[1], alpha=alpha, markersize=2)
axs[1].set_title('Choropleth plot for {}'.format(attribute_2), y=0.8)
axs[1].set_axis_off()
plt.tight_layout()
return fig#, axs
# Use the modified function without the img argument
plot_choropleth(gene_uTAR_merged.obsm["gpd"],
gene1_name,
gene2_name)
plt.show()
###
# Main spatial correlation plot
lisa_cluster(moran_loc_bv,
gene_uTAR_merged.obsm["gpd"],
p=0.05,
figsize=(9, 9),
markersize=0.3,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10916475/ipykernel_1753339/139539757.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(gene_uTAR_merged.obsm["gpd"])
/home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn( /home/s4716765/.local/lib/python3.8/site-packages/mapclassify/classifiers.py:257: UserWarning: Not enough unique values in array to form 5 classes. Setting k to 2. warnings.warn(
<Figure size 640x480 with 0 Axes>
In [ ]:
In [9]:
curio_genes_mel
Out[9]:
AnnData object with n_obs × n_vars = 297240 × 28803
obsm: 'X_spatial', 'spatial'
In [ ]:
count_df = pd.DataFrame(curio_genes_mel.X.toarray().T, # Transpose the matrix to match the desired shape
index=curio_genes_mel.var.index, # Genes as rows
columns=curio_genes_mel.obs.index) # Cells as columns
# Write the DataFrame to a tab-separated file
count_df.to_csv("curio_gene_matrix.txt", sep="\t", index=True, header=True)
In [8]:
# Read the count matrix text file into a DataFrame
count_df = pd.read_csv("Xenium_uTAR_mat.txt", sep="\t", index_col=0)
# Get the barcodes (row indices) from the AnnData object
adata_barcodes = curio_genes_mel.obs.index
# Check if the barcodes in the count matrix columns match with adata's barcodes
if list(count_df.columns) == list(adata_barcodes):
print("The barcodes are in the same order.")
else:
print("The barcodes are not in the same order.")
# Optionally, identify mismatched barcodes
# mismatches = [barcode for barcode, adata_barcode in zip(count_df.columns, adata_barcodes) if barcode != adata_barcode]
# print("Mismatched barcodes:", mismatches)
The barcodes are in the same order.
Melanoma¶
In [13]:
os.chdir("/scratch/project/stseq/Prakrithi/curio/Human_melanoma_10by10_input/Human_melanoma_10by10/")
Curio gene counts¶
In [3]:
curio_genes_mel = sc.read_h5ad('/QRISdata/Q4386/curio/Human_melanoma_10by10_anndata.h5ad')
In [ ]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['PMEL', 'MLANA','DCT','MALAT1'],cmap='Spectral_r',vmax=2, spot_size=30)
In [5]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['VGF', 'FN1','KRT10','SH3KBP1'],cmap='Spectral_r',vmax=2, spot_size=30)
In [13]:
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['NDRG1', 'BIRC5','ELAVL1','TARDBP'],cmap='Reds',vmax=2, spot_size=100)
In [ ]:
curio_genes_mel
In [105]:
import anndata
import numpy as np
# Define the gene of interest
gene2 = 'chr5_11929999_11935299_+_2148_0'
# Ensure the gene exists in adata2
if gene2 in Mel_uTARs.var_names:
# Create a boolean mask for cells where gene2 > 0 in adata2
mask = Mel_uTARs[:, gene2].X > 0
# Convert the boolean mask to a boolean array (needed for indexing)
boolean_mask = np.array(mask).flatten()
# Ensure the length of the boolean mask matches the number of cells in adata1
if len(boolean_mask) == Mel_uTARs.shape[0]:
# Subset adata1 based on the cells where gene2 > 0 in adata2
utar_subset = Mel_uTARs[boolean_mask].copy()
# Verify the result
print(f"Number of cells in adata1 where {gene2} > 0 in adata2: {utar_subset.shape[0]}")
print(utar_subset.obs.head())
else:
raise ValueError("The length of the boolean mask does not match the number of cells in adata2")
else:
raise ValueError(f"The gene '{gene2}' is not in adata2.var_names")
sc.pl.spatial(utar_subset,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=0.5, spot_size=200)
Number of cells in adata1 where chr5_11929999_11935299_+_2148_0 > 0 in adata2: 24
imagecol imagerow
GTGCGCCCTCGCAA 5094.00 6978.50
TAGTAACCTGACTG 6173.40 5723.95
CGTGAAGCGCAAGC 5444.50 6452.00
CACGCCTGTAGAGG 6355.45 5497.15
TGCGGCAAGTTAGT 4926.15 5336.50
In [133]:
gene_subset[:, "BIRC5"].X.toarray()
Out[133]:
array([[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.],
[0.]], dtype=float32)
In [ ]:
In [116]:
import anndata
import numpy as np
# Define the gene of interest
gene2 = 'chr5_11929999_11935299_+_2148_0'
# Ensure the gene exists in adata2
if gene2 in Mel_uTARs.var_names:
# Create a boolean mask for cells where gene2 > 0 in adata2
mask = Mel_uTARs[:, gene2].X > 0
# Convert the boolean mask to a boolean array (needed for indexing)
boolean_mask = np.array(mask).flatten()
# Ensure the length of the boolean mask matches the number of cells in adata1
if len(boolean_mask) == Mel_uTARs.shape[0]:
# Subset adata1 based on the cells where gene2 > 0 in adata2
gene_subset = curio_genes_mel[boolean_mask].copy()
# Verify the result
print(f"Number of cells in adata1 where {gene2} > 0 in adata2: {gene_subset.shape[0]}")
print(gene_subset.obs.head())
else:
raise ValueError("The length of the boolean mask does not match the number of cells in adata2")
else:
raise ValueError(f"The gene '{gene2}' is not in adata2.var_names")
Number of cells in adata1 where chr5_11929999_11935299_+_2148_0 > 0 in adata2: 24 Empty DataFrame Columns: [] Index: [GTGCGCCCTCGCAA, TAGTAACCTGACTG, CGTGAAGCGCAAGC, CACGCCTGTAGAGG, TGCGGCAAGTTAGT]
In [123]:
c5_pos=gene_subset.obs.index.to_list()
c5_pos
Out[123]:
['GTGCGCCCTCGCAA', 'TAGTAACCTGACTG', 'CGTGAAGCGCAAGC', 'CACGCCTGTAGAGG', 'TGCGGCAAGTTAGT', 'CCCTCAAAGTTTGA', 'GCGGCAGTTGACCG', 'TAAGCGAGTGTTGG', 'CGCTTGGGTAGCAT', 'TGCGCTATTCTAAC', 'TGCCTGCGGTCGTG', 'TACACCGAGATAGC', 'TGCTCTTCCAACAG', 'CTGGACTTCTGCTC', 'GTCGCGCTCGTCCG', 'ACTATGTTAACTCG', 'ATTATGAAGGCCTA', 'ATAGTTGCAAGGTG', 'CCTACGTCCTATTA', 'AAGGATGCCCCACG', 'AGATAAGGCGCTGT', 'AGCTCTTATTTCCG', 'ATCAGTAGCTATCA', 'GATGAGTGATGCAA']
In [101]:
sc.pl.spatial(gene_subset,alpha_img=0.6, color =['BIRC5'],cmap='Reds',vmax=0.5, spot_size=200)
uTARs¶
In [4]:
uM=np.transpose(pd.read_csv('/QRISdata/Q4386/curio/melanoma/interested_uTAR_mat.txt', sep="\t", header=0, index_col=0)) #interested_uTAR_mat_f.txt
spatialuM=pd.read_csv('/QRISdata/Q4386/curio/melanoma/spatial_row_col_rev', sep="\t", header=0)
Mel_uTARs = st.create_stlearn(count=uM,spatial=spatialuM,library_id="M",scale=0.5)
In [5]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0','chr2_81555449_81557249_+_9979_0',
'chr15_80010899_80012049_+_328_0','chr2_149482749_149491499_+_1689_0',
'chr5_11929999_11935299_+_2148_0','chr17_49855199_49859399_+_535_0'], spot_size=250,cmap='Reds',vmax=0.5)
#'chr9_22405949_22410899_+_165_0',
In [28]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr7_118702349_118704699_+_474624_0','chr18_13935449_13945249_-_114626_0',
'chr10_31969999_31970199_+_12945_0','chr13_39795849_39798149_+_8404_0'], spot_size=150,cmap='Reds',vmax=0.5)
#'chr9_22405949_22410899_+_165_0',
In [10]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr6_6676749_6678349_-_285_0','chr15_80010899_80012049_+_328_0','chr17_49860099_49862399_+_346_0',
'chr12_28791749_28795299_-_2741_0','chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0'],
spot_size=150,cmap='Reds',vmax=0.5) #'chr9_22405949_22410899_+_165_0'
In [5]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr12_28791749_28795299_-_2741_0'],
spot_size=150,cmap='inferno',vmax=1) #'chr9_22405949_22410899_+_165_0'
In [5]:
sc.pp.filter_cells(curio_genes_mel, min_genes=100)
sc.pl.spatial(curio_genes_mel,alpha_img=0.6, color =['PMEL','MLANA'],spot_size=150,cmap='inferno')
In [18]:
curio_genes_mel
Out[18]:
AnnData object with n_obs × n_vars = 92326 × 28803
obs: 'n_genes'
obsm: 'X_spatial', 'spatial'
In [19]:
uM=np.transpose(pd.read_csv('/QRISdata/Q4386/curio/melanoma/interested_uTAR_mat_f.txt', sep="\t", header=0, index_col=0)) #interested_uTAR_mat_f.txt
spatialuM=pd.read_csv('/QRISdata/Q4386/curio/melanoma/spatial_row_col_rev', sep="\t", header=0)
Mel_uTARs = st.create_stlearn(count=uM,spatial=spatialuM,library_id="M",scale=0.5)
# Find common cells between the two objects
common_cells = Mel_uTARs.obs_names.intersection(curio_genes_mel.obs_names)
# Subset Mel_uTARs to keep only those common cells
Mel_uTARs = Mel_uTARs[common_cells].copy()
#sc.pp.filter_cells(Mel_uTARs, min_genes=)
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr10_31969999_31970199_+_12945_0','chr15_80010899_80012049_+_328_0'],
spot_size=150,cmap='inferno',vmax=1) #'chr9_22405949_22410899_+_165_0'
In [21]:
import matplotlib.pyplot as plt
with plt.rc_context({"figure.figsize": (8, 8), "figure.dpi": (300)}):
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['chr15_80010899_80012049_+_328_0'], show=False,
spot_size=150,cmap='inferno',vmax=1)
plt.savefig("/scratch/project/stseq/Prakrithi/STOMICS/cuTAR94762_curio_mel.pdf", bbox_inches="tight")
In [8]:
import numpy as np
# Calculate the total counts for each observation (cell)
obs_counts = Mel_uTARs.X.sum(axis=1)
# Keep only observations (cells) with counts >= 50
Mel_uTARs_f = Mel_uTARs[obs_counts >= 1].copy()
In [9]:
sc.pl.spatial(Mel_uTARs_f,alpha_img=0.6, color =['chr12_111603399_111610199_-_603_0','chr9_81829699_81841349_-_802_0','chr2_81555449_81557249_+_9979_0',
'chr15_80010899_80012049_+_328_0','chr2_149482749_149491499_+_1689_0',
'chr5_11929999_11935299_+_2148_0','chr17_49855199_49859399_+_535_0'], spot_size=250,cmap='Reds',vmax=1)
#'chr9_22405949_22410899_+_165_0',
In [16]:
curio_genes_mel[:, 'PMEL'].X
Out[16]:
<297240x1 sparse matrix of type '<class 'numpy.float32'>' with 91951 stored elements in Compressed Sparse Row format>
In [19]:
import numpy as np
import pandas as pd
# 1. Extract BIRC5 counts from the first AnnData (adata1)
if 'PMEL' in curio_genes_mel.var_names:
birc5_counts = curio_genes_mel[:, 'PMEL'].X
birc5_df = pd.DataFrame(birc5_counts.toarray(), index=curio_genes_mel.obs.index, columns=['PMEL'])
else:
print("PMEL is not found in adata1.")
# 2. Ensure that only the matching obs (cells) between adata1 and adata2 are selected
# We perform an inner join on the index to match the cells in both AnnData objects.
common_obs = Mel_uTARs.obs.index.intersection(birc5_df.index)
# 3. Filter the BIRC5 data to only the matching observations
birc5_matching = birc5_df.loc[common_obs]
# 4. Add BIRC5 counts as a new column to the adata2.obs dataframe
Mel_uTARs.obs['PMEL'] = np.nan # Initialize the column with NaNs
Mel_uTARs.obs.loc[common_obs, 'PMEL'] = birc5_matching['PMEL'].values # Add BIRC5 counts for matching cells
# 5. Verify the results
print(Mel_uTARs.obs[['PMEL']].dropna())
B=Mel_uTARs.obs[['PMEL']].dropna()
B=B.T
B = B.astype(int)
print(B)
B.to_csv("PMEL_counts.txt", sep="\t")
PMEL
ATGTATACGGAAGG 0.0
CCTCGGCAAACCGC 0.0
CGGCCATACGAATA 0.0
TGTGTCCTGATGAC 0.0
GCCCCGAGTATCCG 4.0
... ...
CATGCAGAGTCGAG 1.0
TTCTTAATAACGAT 0.0
CTCCAGTATCGATA 0.0
GGCTCTGAAGTATA 0.0
GCTGATGAGAATGT 0.0
[297240 rows x 1 columns]
ATGTATACGGAAGG CCTCGGCAAACCGC CGGCCATACGAATA TGTGTCCTGATGAC \
PMEL 0 0 0 0
GCCCCGAGTATCCG GTGCGGATGGGTTC CTCCTGGAGTGGCA CAGTATGGCTGGCT \
PMEL 4 1 2 2
TGGCGGCTGAACCT GCAGCGGCGCCTTT ... ACATACACGAACTG TCCCATAAGTAACT \
PMEL 3 0 ... 0 0
AGAACAACAGACCA CAGGAAAGTAAGTG CACATAAACAAAAT CATGCAGAGTCGAG \
PMEL 0 0 0 1
TTCTTAATAACGAT CTCCAGTATCGATA GGCTCTGAAGTATA GCTGATGAGAATGT
PMEL 0 0 0 0
[1 rows x 297240 columns]
In [18]:
birc5_matching
Out[18]:
| BIRC5 | |
|---|---|
| ATGTATACGGAAGG | 0.0 |
| CCTCGGCAAACCGC | 0.0 |
| CGGCCATACGAATA | 0.0 |
| TGTGTCCTGATGAC | 0.0 |
| GCCCCGAGTATCCG | 4.0 |
| ... | ... |
| CATGCAGAGTCGAG | 1.0 |
| TTCTTAATAACGAT | 0.0 |
| CTCCAGTATCGATA | 0.0 |
| GGCTCTGAAGTATA | 0.0 |
| GCTGATGAGAATGT | 0.0 |
297240 rows × 1 columns
In [29]:
Mel_uTARs
Out[29]:
AnnData object with n_obs × n_vars = 297240 × 13
obs: 'imagecol', 'imagerow'
uns: 'spatial'
obsm: 'spatial'
In [125]:
import numpy as np
import pandas as pd
# 1. Extract BIRC5 counts from the first AnnData (adata1)
if 'BIRC5' in curio_genes_mel.var_names:
birc5_counts = curio_genes_mel[:, 'BIRC5'].X
birc5_df = pd.DataFrame(birc5_counts.toarray(), index=curio_genes_mel.obs.index, columns=['BIRC5'])
else:
print("BIRC5 is not found in adata1.")
# 2. Ensure that only the matching obs (cells) between adata1 and adata2 are selected
# We perform an inner join on the index to match the cells in both AnnData objects.
common_obs = Mel_uTARs.obs.index.intersection(birc5_df.index)
# 3. Filter the BIRC5 data to only the matching observations
birc5_matching = birc5_df.loc[common_obs]
# 4. Add BIRC5 counts as a new column to the adata2.obs dataframe
Mel_uTARs.obs['BIRC5'] = np.nan # Initialize the column with NaNs
Mel_uTARs.obs.loc[common_obs, 'BIRC5'] = birc5_matching['BIRC5'].values # Add BIRC5 counts for matching cells
# 5. Verify the results
print(Mel_uTARs.obs[['BIRC5']].dropna())
BIRC5 ATGTATACGGAAGG 0.0 CCTCGGCAAACCGC 0.0 CGGCCATACGAATA 0.0 TGTGTCCTGATGAC 0.0 GCCCCGAGTATCCG 2.0 ... ... CATGCAGAGTCGAG 0.0 TTCTTAATAACGAT 0.0 CTCCAGTATCGATA 0.0 GGCTCTGAAGTATA 0.0 GCTGATGAGAATGT 0.0 [297240 rows x 1 columns]
In [42]:
B=Mel_uTARs.obs[['BIRC5']].dropna()
B=B.T
B = B.astype(int)
B
Out[42]:
| ATGTATACGGAAGG | CCTCGGCAAACCGC | CGGCCATACGAATA | TGTGTCCTGATGAC | GCCCCGAGTATCCG | GTGCGGATGGGTTC | CTCCTGGAGTGGCA | CAGTATGGCTGGCT | TGGCGGCTGAACCT | GCAGCGGCGCCTTT | ... | ACATACACGAACTG | TCCCATAAGTAACT | AGAACAACAGACCA | CAGGAAAGTAAGTG | CACATAAACAAAAT | CATGCAGAGTCGAG | TTCTTAATAACGAT | CTCCAGTATCGATA | GGCTCTGAAGTATA | GCTGATGAGAATGT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BIRC5 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 rows × 297240 columns
In [ ]:
In [128]:
B=Mel_uTARs.obs[['BIRC5']].dropna()
B=B.loc[c5_pos,:]
B=B.T
B = B.astype(int)
B
Out[128]:
| GTGCGCCCTCGCAA | TAGTAACCTGACTG | CGTGAAGCGCAAGC | CACGCCTGTAGAGG | TGCGGCAAGTTAGT | CCCTCAAAGTTTGA | GCGGCAGTTGACCG | TAAGCGAGTGTTGG | CGCTTGGGTAGCAT | TGCGCTATTCTAAC | ... | GTCGCGCTCGTCCG | ACTATGTTAACTCG | ATTATGAAGGCCTA | ATAGTTGCAAGGTG | CCTACGTCCTATTA | AAGGATGCCCCACG | AGATAAGGCGCTGT | AGCTCTTATTTCCG | ATCAGTAGCTATCA | GATGAGTGATGCAA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BIRC5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 rows × 24 columns
In [121]:
c5_pos
Out[121]:
Index(['GTGCGCCCTCGCAA', 'TAGTAACCTGACTG', 'CGTGAAGCGCAAGC', 'CACGCCTGTAGAGG',
'TGCGGCAAGTTAGT', 'CCCTCAAAGTTTGA', 'GCGGCAGTTGACCG', 'TAAGCGAGTGTTGG',
'CGCTTGGGTAGCAT', 'TGCGCTATTCTAAC', 'TGCCTGCGGTCGTG', 'TACACCGAGATAGC',
'TGCTCTTCCAACAG', 'CTGGACTTCTGCTC', 'GTCGCGCTCGTCCG', 'ACTATGTTAACTCG',
'ATTATGAAGGCCTA', 'ATAGTTGCAAGGTG', 'CCTACGTCCTATTA', 'AAGGATGCCCCACG',
'AGATAAGGCGCTGT', 'AGCTCTTATTTCCG', 'ATCAGTAGCTATCA', 'GATGAGTGATGCAA'],
dtype='object')
In [ ]:
In [43]:
B.to_csv("BIRC5_counts.txt", sep="\t")
In [66]:
#gene_uTAR_merged.obsm["gpd"]
In [11]:
sc.pl.spatial(Mel_uTARs,alpha_img=0.6, color =['BIRC5','chr5_11929999_11935299_+_2148_0','chr10_37409199_37411549_+_142_0'], spot_size=250,cmap='Reds',vmax=0.5)
Morans Mel¶
In [79]:
#spatial.iloc[1:5,:]
#spatial=spatial.set_index('BC')
spatialm=pd.read_csv('spatial_row_col', sep="\t", header=0)
spatialm.index =Mel_uTARs.obs.index
#data.obs=spatial.iloc[:,[0,1,2,3,4]]
from typing import Literal
library_id='M'
#data.obs.iloc[1:5,:]
_QUALITY = Literal["fulres", "hires", "lowres"]
quality: _QUALITY = "hires"
scale = Mel_uTARs.uns["spatial"][library_id]["scalefactors"][
"tissue_" + quality + "_scalef"]
image_coor = Mel_uTARs.obsm["spatial"] * scale
spatialm["imagecol"] = image_coor[:, 1]*0.208
spatialm["imagerow"] = image_coor[:, 0]*0.208
#s4 0.208 s6 0.2025
Mel_uTARs.obsm["gpd"] = gpd.GeoDataFrame(spatialm.iloc[:,:],geometry=gpd.points_from_xy(y=spatialm.imagerow,x=spatialm.imagecol))
In [80]:
# Calculate the total counts for each observation (cell)
obs_counts = Mel_uTARs.X.sum(axis=1)
# Keep only observations (cells) with counts >= 50
Mel_uTARs_f = Mel_uTARs[obs_counts >= 1].copy()
In [81]:
from shapely import wkt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
from esda.moran import Moran, Moran_Local
from splot.esda import moran_scatterplot, lisa_cluster
from libpysal.weights import Queen
import numpy as np
# Example: If you have coordinates in 'longitude' and 'latitude' columns
df = Mel_uTARs_f.obsm["gpd"]
# Convert the geometry column to shapely geometries
# Assuming df already contains shapely geometries in 'geometry' column
df['geometry'] = df['geometry'].apply(lambda x: Point(x) if not isinstance(x, Point) else x)
# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(df, geometry='geometry')
In [134]:
## CALC MORAN'S INDEX
epr_df = Mel_uTARs_f.to_df()
gene1_name="BIRC5"
gene2_name="chr5_11929999_11935299_+_2148_0"
Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]
#w = Queen.from_dataframe(Mel_uTARs_f.obsm["gpd"])
w = Queen.from_dataframe(geo_df)
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
# Convert the DataFrame to a GeoDataFrame using the existing 'geometry' column
Mel_uTARs_f_gdf = gpd.GeoDataFrame(Mel_uTARs_f.obsm["gpd"], geometry='geometry')
fig, axs = plt.subplots(1, 1, figsize=(5, 8), subplot_kw={'adjustable':'datalim'})
lisa_cluster(moran_loc_bv,
Mel_uTARs_f_gdf,
p=0.05,
figsize=(9, 9),
markersize=0.05,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10897584/ipykernel_3931819/859448017.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(geo_df)
In [135]:
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)
print(moran_loc_bv_q_df['moran_local_q'].value_counts())
Mel_uTARs_f.obs['moran_local_q']=moran_loc_bv_q_df['moran_local_q']
Mel_uTARs_f.obs['Coexpressed_spots'] = (Mel_uTARs_f.obs['moran_local_q'] == 1).astype(int)
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 1 if x == 1 else 0)
Mel_uTARs_f.obs['Coexpressed_spots'].value_counts()
moran_local_q 3 6057 2 4238 1 12 4 12 Name: count, dtype: int64
Out[135]:
Coexpressed_spots 0 10307 1 12 Name: count, dtype: int64
In [99]:
sc.pl.spatial(
Mel_uTARs_f,
color='Coexpressed_spots', # Column with pre-defined colors
size=5, cmap='Reds' , vmax=1 # Size of spots
)
In [103]:
import numpy as np
import pandas as pd
import anndata
# Example AnnData object (adata) setup for illustration
# adata = anndata.AnnData(X, obs=obs_df, var=var_df)
# Assuming gene1 and gene2 are the names of the genes you are interested in
gene1 = 'BIRC5'
gene2 = 'chr5_11929999_11935299_+_2148_0'#'chr5_11929999_11935299_+_2148_0'
# Ensure that 'gene1' and 'gene2' are present in adata.var
if gene1 in Mel_uTARs_f.var_names and gene2 in Mel_uTARs_f.var_names:
# Create the new column 'coexpressed' in adata.obs
Mel_uTARs_f.obs['coexpressed'] = np.where(
(Mel_uTARs_f[:, gene1].X > 0) & (Mel_uTARs_f[:, gene2].X > 0), 1, 0
)
else:
raise ValueError(f"One or both genes '{gene1}' and '{gene2}' are not in adata.var_names")
# Verify the result
Mel_uTARs_f.obs['coexpressed'].value_counts()
Out[103]:
coexpressed 0 10319 Name: count, dtype: int64
In [77]:
moran_loc_bv._Moran_Local_BV__quads
Out[77]:
<bound method Moran_Local_BV.__quads of <esda.moran.Moran_Local_BV object at 0x7f7bb1736d90>>
In [36]:
# List all attributes and methods of the Moran_Local_BV object
print(dir(moran_loc_bv))
['EI_sim', 'Is', 'VI_sim', '_Moran_Local_BV__calc', '_Moran_Local_BV__quads', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_statistic', 'by_col', 'den', 'geoda_quads', 'n', 'n_1', 'p_sim', 'p_z_sim', 'permutations', 'q', 'quads', 'rlisas', 'seI_sim', 'sim', 'w', 'x', 'y', 'z_sim', 'zx', 'zy']
In [ ]:
#HH_points = Mel_uTARs_f_gdf[moran_loc_bv.q == 'HH']
#HH_points
#print(dir(moran_loc_bv))
# Import necessary packages
import numpy as np
import pandas as pd
# Loop over attributes and print the first few rows if it's a valid type
for attr in dir(moran_loc_bv):
# Filter out special or callable attributes (like methods)
if not attr.startswith('_') and not callable(getattr(moran_loc_bv, attr)):
value = getattr(moran_loc_bv, attr)
# Print attribute name
print(f"Attribute: {attr}")
# Handle different data types and print first few rows
if isinstance(value, (pd.DataFrame, pd.Series)): # For pandas DataFrame or Series
print(value.head(), "\n")
elif isinstance(value, (np.ndarray, list)): # For numpy arrays or lists
print(np.array(value)[:5], "\n") # Print first 5 elements
else:
print(value, "\n") # Print the value as it is (if it's not a complex type)
In [32]:
fig, ax = plt.subplots(figsize=(4,20))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
In [38]:
moran_loc_bv.q
Out[38]:
array([3, 3, 3, ..., 3, 3, 3])
In [84]:
import pandas as pd
# Assuming moran_loc_bv.q is a NumPy array or list of the same length as adata.obs
# Convert moran_loc_bv.q to a DataFrame
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)
# Ensure that the indices match between adata.obs and moran_loc_bv_q_df
assert all(Mel_uTARs_f.obs.index == moran_loc_bv_q_df.index), "Indices of adata.obs and moran_loc_bv_q_df do not match."
# Add the new data to adata.obs
Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df)
# Verify the addition
print(Mel_uTARs_f.obs.head())
Mel_uTARs_f['moran_local_q'].value_counts()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[84], line 11 8 assert all(Mel_uTARs_f.obs.index == moran_loc_bv_q_df.index), "Indices of adata.obs and moran_loc_bv_q_df do not match." 10 # Add the new data to adata.obs ---> 11 Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df) 13 # Verify the addition 14 print(Mel_uTARs_f.obs.head()) File ~/.local/lib/python3.8/site-packages/pandas/core/frame.py:9729, in DataFrame.join(self, other, on, how, lsuffix, rsuffix, sort, validate) 9566 def join( 9567 self, 9568 other: DataFrame | Series | Iterable[DataFrame | Series], (...) 9574 validate: str | None = None, 9575 ) -> DataFrame: 9576 """ 9577 Join columns of another DataFrame. 9578 (...) 9727 5 K1 A5 B1 9728 """ -> 9729 return self._join_compat( 9730 other, 9731 on=on, 9732 how=how, 9733 lsuffix=lsuffix, 9734 rsuffix=rsuffix, 9735 sort=sort, 9736 validate=validate, 9737 ) File ~/.local/lib/python3.8/site-packages/pandas/core/frame.py:9768, in DataFrame._join_compat(self, other, on, how, lsuffix, rsuffix, sort, validate) 9758 if how == "cross": 9759 return merge( 9760 self, 9761 other, (...) 9766 validate=validate, 9767 ) -> 9768 return merge( 9769 self, 9770 other, 9771 left_on=on, 9772 how=how, 9773 left_index=on is None, 9774 right_index=True, 9775 suffixes=(lsuffix, rsuffix), 9776 sort=sort, 9777 validate=validate, 9778 ) 9779 else: 9780 if on is not None: File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:162, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate) 131 @Substitution("\nleft : DataFrame or named Series") 132 @Appender(_merge_doc, indents=0) 133 def merge( (...) 146 validate: str | None = None, 147 ) -> DataFrame: 148 op = _MergeOperation( 149 left, 150 right, (...) 160 validate=validate, 161 ) --> 162 return op.get_result(copy=copy) File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:811, in _MergeOperation.get_result(self, copy) 807 self.left, self.right = self._indicator_pre_merge(self.left, self.right) 809 join_index, left_indexer, right_indexer = self._get_join_info() --> 811 result = self._reindex_and_concat( 812 join_index, left_indexer, right_indexer, copy=copy 813 ) 814 result = result.__finalize__(self, method=self._merge_type) 816 if self.indicator: File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:763, in _MergeOperation._reindex_and_concat(self, join_index, left_indexer, right_indexer, copy) 760 left = self.left[:] 761 right = self.right[:] --> 763 llabels, rlabels = _items_overlap_with_suffix( 764 self.left._info_axis, self.right._info_axis, self.suffixes 765 ) 767 if left_indexer is not None and not is_range_indexer(left_indexer, len(left)): 768 # Pinning the index here (and in the right code just below) is not 769 # necessary, but makes the `.take` more performant if we have e.g. 770 # a MultiIndex for left.index. 771 lmgr = left._mgr.reindex_indexer( 772 join_index, 773 left_indexer, (...) 778 use_na_proxy=True, 779 ) File ~/.local/lib/python3.8/site-packages/pandas/core/reshape/merge.py:2604, in _items_overlap_with_suffix(left, right, suffixes) 2601 lsuffix, rsuffix = suffixes 2603 if not lsuffix and not rsuffix: -> 2604 raise ValueError(f"columns overlap but no suffix specified: {to_rename}") 2606 def renamer(x, suffix): 2607 """ 2608 Rename the left and right indices. 2609 (...) 2620 x : renamed column name 2621 """ ValueError: columns overlap but no suffix specified: Index(['moran_local_q'], dtype='object')
In [ ]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
sc.pl.spatial(
Mel_uTARs_f,
color='Coexpressed_spots', # Column with pre-defined colors
size=2, cmap='Reds' # Size of spots
)
In [28]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
Mel_uTARs_f.obs['Coexpressed_spots'].value_counts()
Out[28]:
Coexpressed_spots 0 98086 Name: count, dtype: int64
In [18]:
import pandas as pd
# Convert moran_loc_bv.q to a DataFrame
moran_loc_bv_q_df = pd.DataFrame(moran_loc_bv.q, columns=['moran_local_q'], index=Mel_uTARs_f.obs.index)
# Check for column overlap in Mel_uTARs_f.obs
existing_columns = Mel_uTARs_f.obs.columns
if 'moran_local_q' in existing_columns:
# Rename the column in moran_loc_bv_q_df to avoid overlap
moran_loc_bv_q_df.rename(columns={'moran_local_q': 'moran_local_q_new'}, inplace=True)
# Add the new data to adata.obs
Mel_uTARs_f.obs = Mel_uTARs_f.obs.join(moran_loc_bv_q_df)
# Verify the addition
print(Mel_uTARs_f.obs.head())
imagecol imagerow moran_local_q GCCCCGAGTATCCG 5419.20 6997.75 3 GTGCGGATGGGTTC 5495.50 4708.40 3 CTCCTGGAGTGGCA 2456.50 4922.75 3 CAGTATGGCTGGCT 5641.55 6668.85 3 TGGCGGCTGAACCT 2643.40 4296.00 3
In [ ]:
In [41]:
Mel_uTARs_f.obs['moran_local_q'].unique()
Mel_uTARs_f.obs['moran_local_q_new'].unique()
Out[41]:
array([3, 2, 4, 1])
In [43]:
Mel_uTARs_f.obs['moran_local_q_new'].value_counts()
Out[43]:
moran_local_q 3 92372 2 5690 4 23 1 1 Name: count, dtype: int64
In [89]:
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
# Ensure 'moran_local_q_new' is categorical
Mel_uTARs_f.obs['moran_local_q_new'] = Mel_uTARs_f.obs['moran_local_q_new'].astype('category')
# Define the colors for the categories
colors = ['red', 'green', 'orange', 'blue']
categories = Mel_uTARs_f.obs['moran_local_q_new'].cat.categories
# Create a colormap and norm
cmap = mcolors.ListedColormap(colors)
norm = mcolors.BoundaryNorm(boundaries=range(len(categories)+1), ncolors=len(categories))
# Plot the spatial data
sc.pl.spatial(
Mel_uTARs_f,
alpha_img=0.6,
color='moran_local_q_new',
spot_size=100,
cmap=cmap,
norm=norm,
vmax=len(categories)-1
)
In [91]:
Co_exp = Mel_uTARs_f[Mel_uTARs_f.obs['moran_local_q_new'] == 1].copy()
sc.pl.spatial(
Co_exp,
alpha_img=0.6,
color='moran_local_q_new',
spot_size=200)
In [117]:
# Define the color map
colors = {
'1': 'red',
'2': 'lightgrey',
'3': 'lightgrey',
'4': 'lightgrey'
}
# Ensure 'moran_local_q_new' is in string format if necessary
Mel_uTARs_f.obs['moran_local_q_new'] = Mel_uTARs_f.obs['moran_local_q_new'].astype(str)
# Map colors correctly
Mel_uTARs_f.obs['moran_local_q_color'] = Mel_uTARs_f.obs['moran_local_q_new'].map(colors)
# Verify the result
print(Mel_uTARs_f.obs[['moran_local_q_new', 'moran_local_q_color']].head())
moran_local_q_new moran_local_q_color GCCCCGAGTATCCG 2 lightgrey GATAACGGCCGTCC 2 lightgrey GGTACCTCGTTGGA 3 lightgrey AAACTCACATTCTC 2 lightgrey CACGGATGGACAGT 3 lightgrey
In [122]:
# Map the colors to the combined clusters
#Mel_uTARs_f.obs['moran_local_q_color'] = Mel_uTARs_f.obs['moran_local_q_new'].astype(str).map(colors)
# Plot using sc.pl.spatial with the custom color column
sc.pl.spatial(
Mel_uTARs_f,
color='moran_local_q_new', # Column with pre-defined colors
size=5, # Size of spots
)
In [67]:
Mel_uTARs_f.obs['Coexpressed_spots'] = Mel_uTARs_f.obs['moran_local_q'].apply(lambda x: 10 if x == "1" else 0)
sc.pl.spatial(
Mel_uTARs_f,
color='Coexpressed_spots', # Column with pre-defined colors
size=5, cmap='Reds' , vmax=1 # Size of spots
)
In [127]:
Mel_uTARs_f.obs['moran_local_q_reversed'].value_counts()
Out[127]:
moran_local_q_reversed 0 10319 Name: count, dtype: int64
In [135]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from splot.esda import moran_scatterplot, lisa_cluster
from libpysal.weights import Queen
from esda.moran import Moran, Moran_Local
# Convert the AnnData object to DataFrame
epr_df = Mel_uTARs_f.to_df()
# Define gene names
gene1_name = "BIRC5"
gene2_name = "chr5_11929999_11935299_+_2148_0"
# Add gene expression data to obsm
Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]
# Extract the geometry data
geo_df = Mel_uTARs_f.obsm["gpd"].copy()
# Ensure the geometry column is correctly formatted
if not isinstance(geo_df['geometry'].iloc[0], (Point,)):
raise ValueError("Geometry column should contain Shapely Point objects. Please convert the data accordingly.")
# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(geo_df, geometry='geometry')
# Calculate spatial weights
w = Queen.from_dataframe(geo_df)
# Extract gene expression data
x = np.array(epr_df[gene1_name]).astype(float)
y = np.array(epr_df[gene2_name]).astype(float)
# Calculate Moran's Index
moran = Moran(y, w)
moran_loc_bv = Moran_Local(y, w)
# Plot Moran's scatterplot
fig, ax = plt.subplots(figsize=(4, 18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel(f'Gene {gene1_name}')
ax.set_ylabel(f'Gene {gene2_name}')
plt.tight_layout()
plt.show()
# Plot spatial correlation
fig, axs = plt.subplots(1, 1, figsize=(9, 9), subplot_kw={'adjustable': 'datalim'})
lisa_cluster(
moran_loc_bv,
geo_df,
p=0.05, # Adjust p-value threshold if needed
markersize=5, # Adjust markersize for better visibility
alpha=0.7, # Adjust alpha for better visibility
ax=axs,
legend_kwds={'loc': 'upper left', 'bbox_to_anchor': (0.92, 0.8)}
)
axs.set_title(f'{gene1_name} vs {gene2_name}', y=0.75)
axs.set_axis_off()
plt.show()
/scratch/temp/10858931/ipykernel_3186245/383420554.py:31: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(geo_df)
In [137]:
print(f"Min: {moran_loc_bv.q.min()}, Max: {moran_loc_bv.q.max()}")
Min: 2, Max: 4
In [139]:
moran_loc_bv.q.shape
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[139], line 1 ----> 1 moran_loc_bv.q.shape() TypeError: 'tuple' object is not callable
PMEL¶
In [30]:
## CALC MORAN'S INDEX
epr_df = Mel_uTARs_f.to_df()
gene1_name="PMEL"
gene2_name="chr10_31969999_31970199_+_12945_0"
Mel_uTARs_f.obsm["gpd"][gene1_name] = epr_df[gene1_name]
#y = pd.DataFrame(y[:,:1])
Mel_uTARs_f.obsm["gpd"][gene2_name] = epr_df[gene2_name]
#w = Queen.from_dataframe(Mel_uTARs_f.obsm["gpd"])
w = Queen.from_dataframe(geo_df)
x=np.array(epr_df[gene1_name]).astype(int) #need to set type to int or float
y=np.array(epr_df[gene2_name]).astype(int)
#calculate moran index
moran = Moran(y,w)
moran_loc_bv = Moran_Local_BV(y, x, w)
fig, ax = plt.subplots(figsize=(4,18))
moran_scatterplot(moran_loc_bv, p=0.05, ax=ax)
ax.set_xlabel('Gene {}'.format(gene1_name))
ax.set_ylabel('Gene {}'.format(gene2_name))
plt.tight_layout()
plt.show()
/scratch/temp/10869136/ipykernel_1006190/1010272377.py:11: FutureWarning: `use_index` defaults to False but will default to True in future. Set True/False directly to control this behavior and silence this warning w = Queen.from_dataframe(geo_df)
In [36]:
import geopandas as gpd
import geopandas as gpd
# Convert the DataFrame to a GeoDataFrame using the existing 'geometry' column
Mel_uTARs_f_gdf = gpd.GeoDataFrame(Mel_uTARs_f.obsm["gpd"], geometry='geometry')
fig, axs = plt.subplots(1, 1, figsize=(5, 8),
subplot_kw={'adjustable':'datalim'})
lisa_cluster(moran_loc_bv,
Mel_uTARs_f_gdf,
p=0.05,
figsize=(9, 9),
markersize=0.15,
**{"alpha": 1},
ax=axs,
legend_kwds={'loc': 'upper left',
'bbox_to_anchor': (0.92, 0.8)})
axs.set_title(gene1_name + ' vs ' + gene2_name, y=0.75)
axs.set_axis_off()
plt.show()
In [ ]:
In [ ]: